#!/bin/bash
cd `dirname $0`
source inc/common.bash
source conf/initial-import.conf

sigint() {
	echo ""
	errorecho "User cancelled"
	exit 1
}
trap sigint INT

cat <<EOF
########## Geovision initial data import script ##########

This script will create a database suitable for Geovision and import all the data files specified in scripts/conf/initial-import.conf
This WILL take several hours and DESTROY EVERYTHING IN THE DATABASE!

Before continuing, make sure that

1) You have a running PostgreSQL server, and that Geovision is properly configured to use it in src/geovision/dbconfig.py

2) You have configured the paths to the KEGG database files and UniProt ecs database in scripts/conf/initial-import.conf

3) You have listed all the data files (samples, databases, blasts) you want to import in scripts/conf/initial-import.conf
   Note that it is usually much faster to import as much data that is possible during the initial importing phase.

4) You are running this script either on a local terminal (Linux virtual terminal, X terminal), or in a GNU screen session to prevent any errors caused by e.g. flaky SSH connections.

Hit enter to read more...
EOF

read INPUT

cat <<EOF
Initial import configuration

Uniprot ECS database file: $ECS_PATH
KEGG database file: $KEGG_ENZYME_PATH

Sample files:
$SAMPLES

Databases:
$(echo "$DATABASES" | sed -e 's/|/: /g')

Blasts:
$(find $BLAST_PATH -name '*.align')

EOF

read -p "Is the information above correct? Enter 'YES' to continue, anything else to abort: " INPUT

if [ "$INPUT" != 'YES' ]
then
	echo 'Aborting.'
	exit 0
fi

statusecho "(Re)creating database"
run-manage syncdb --noinput || die "Is your database configuration in src/geovision/dbconfig.py correct? Did you remember to start-localpostgres?"
run-manage reset --noinput viz meta

read -p "Do you want to create a new superuser account? (y/n): " INPUT
if [ "$INPUT" = "y" ]
then
	run-manage createsuperuser
else
	echo 'You can create one later with the create-superuser script'
fi
statusecho "Starting the import process. Go and have a looong coffee break."

# drop predefined indexes to speed up importing
run-psql -c 'DROP INDEX "viz_blast_read_id";' > /dev/null
run-psql -c 'DROP INDEX "viz_blast_read_id_like";' > /dev/null
run-psql -c 'DROP INDEX "viz_blast_db_entry_id";' > /dev/null
run-psql -c 'DROP INDEX "viz_blast_db_entry_id_like";' > /dev/null
run-psql -c 'DROP INDEX "viz_blastecs_db_entry_id_like";' > /dev/null
run-psql -c 'DROP INDEX "viz_dbuniprotecs_db_id_like";' > /dev/null
run-psql -c 'ALTER TABLE viz_dbuniprotecs DROP CONSTRAINT viz_dbuniprotecs_db_id_fkey;' > /dev/null

statusecho "Importing utility databases"
import-kegg "$KEGG_ENZYME_PATH"
import-uniprotecs "$ECS_PATH" --quiet

statusecho "Importing samples"
for SAMPLEFILE in $SAMPLES
do
	import-sample $SAMPLEFILE
done

statusecho "Importing databases"
for DB in $DATABASES
do
	DBNAME=`echo $DB | cut -d'|' -f1`
	DBFILE=`echo $DB | cut -d'|' -f2`
	import-database $DBFILE $DBNAME
done

statusecho "Importing blasts"
for BLASTFILE in $BLAST_PATH/*.align
do
	import-blast $BLASTFILE --no-warning
done

statusecho "Creating indexes"
run-psql -1ac "CREATE INDEX viz_blast_read_id ON viz_blast (read_id) WITH (fillfactor = 100);
CREATE INDEX viz_blast_db_entry_id ON viz_blast (db_entry_id) WITH (fillfactor = 100);
CREATE INDEX viz_blast_bitscore ON viz_blast (bitscore) WITH (fillfactor = 100);
CREATE INDEX viz_dbuniprotecs_ec ON viz_dbuniprotecs(ec) WITH (fillfactor = 100) WHERE ec != '?';

"
rebuild-cache
run-psql -c "CREATE INDEX viz_blastecs_bitscore ON viz_blastecs (bitscore) WITH (fillfactor = 100);
CREATE INDEX viz_blastecs_ec ON viz_blastecs (ec) WITH (fillfactor=100);"
statusecho "Initial importing complete"
